suppressPackageStartupMessages({
library(GenomicRanges)
library(epiwraps)
library(ggplot2)
library(rGREAT)
library(AnnotationHub)
library(ensembldb)
library(bsseq)
library(BiocParallel)
library(edgeR)
library(DMRcate)
library(rtracklayer)
library(sechm)
library(pheatmap)
library(viridis)
library(data.table)
})
set.seed(40)
Download:
options(timeout = 6000)
download.file("https://ethz-ins.org/content/w11_practical.zip", "w11_practical.zip")
dir.create("./w11_practical")
## Warning in dir.create("./w11_practical"): './w11_practical' already exists
unzip("w11_practical.zip", exdir="./w11_practical")
The .bigwig files have already been reduced to chromosome one and only have to be loaded here:
tracksGr <- list("ATAC"="./w11_practical/ATAC.rds",
"H3K27ac"="./w11_practical/H3K27ac.rds",
"H3K4me3"="./w11_practical/H3K4me3.rds",
"DNAme"="./w11_practical/DNAm.rds")
tracksGr <- lapply(tracksGr, readRDS)
ah <- AnnotationHub()
ensdb <- ah[["AH89211"]] # GRCm38
## loading from cache
Obtaining the promoter coordinates of chromosome 1:
chr1 <- GRanges(seqnames=Rle(c("1")),
ranges = IRanges(1, end=195471971))
# We define promoters as the regions +/- 200 of the TSS
tssMargin <- 200
promoterRegions <- promoters(ensdb, upstream=tssMargin, downstream=tssMargin,
filter=GRangesFilter(chr1))
gene body coordinates:
geneBodies <- genes(ensdb, columns=c("gene_seq_start", "gene_seq_end"),
filter=GRangesFilter(chr1))
promoterRegions <- promoterRegions[1:2000]
seqlevelsStyle(promoterRegions) <- "UCSC"
smp <- signal2Matrix(tracksGr, promoterRegions,
extend=1000, w=20,
type="center",
smooth=TRUE)
## Computing signal from GRanges 'ATAC'...
## Computing signal from GRanges 'H3K27ac'...
## Computing signal from GRanges 'H3K4me3'...
## Computing signal from GRanges 'DNAme'...
plotEnrichedHeatmaps(smp,
axis_name="TSS",
multiScale=TRUE,
use_raster=TRUE)
Clustering
cl <- clusterSignalMatrices(assays(smp)$input[,"DNAme", drop=FALSE], k=2)
## ~62% of the variance explained by clusters
table(cl)
## cl
## 1 2
## 1285 715
mycolors <- c("1"="#E69F00", "2"="#56B4E9") # row_split=cl, mean_color=mycolors
plotEnrichedHeatmaps(smp,
axis_name = c("TSS"),
row_split=cl,
scale_title="signal",
mean_color=mycolors,
multiScale=TRUE,
use_raster=TRUE)
For the colors see: Colorblind Color Palette (Discrete) and Scales
tracksDNAm <- readRDS("./w11_practical_extra/tracksDNAm_hb.rds")
bindingSites <- readRDS("./w11_practical_extra/ctcf_binding_sites_hb.rds")
smTfbs <- signal2Matrix(list("DNAm"=tracksDNAm),
bindingSites,
extend=1000, w=20,
type="scale", smooth=TRUE)
## Computing signal from GRanges 'DNAm'...
plotEnrichedHeatmaps(smTfbs,
axis_name = c("peak_start", "peak_end"),
use_raster=TRUE)
se <- readRDS("./w11_practical_extra/EnrichmentSE.rds")
plotEnrichedHeatmaps(se, multiScale=TRUE)
The Bisulfite-sequenncing (BS-seq) data we are looking is from the bsseqData package. It contains colon cancer samples with 3 patients with each a colon cancer and normal colon sample. Here we only look at chromosome 22.
library(bsseq)
bs <- readRDS("./w11_practical/bs.rds")
rowRanges(bs)
## GRanges object with 578097 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr22 16050097 *
## [2] chr22 16050114 *
## [3] chr22 16050174 *
## [4] chr22 16050206 *
## [5] chr22 16050213 *
## ... ... ... ...
## [578093] chr22 51244314 *
## [578094] chr22 51244503 *
## [578095] chr22 51244509 *
## [578096] chr22 51244532 *
## [578097] chr22 51244561 *
## -------
## seqinfo: 2 sequences from an unspecified genome; no seqlengths
pData(bs)
## DataFrame with 6 rows and 2 columns
## Type Pair
## <character> <character>
## C1 cancer pair1
## C2 cancer pair2
## C3 cancer pair3
## N1 normal pair1
## N2 normal pair2
## N3 normal pair3
Get annotations (hs):
# genes
ensdb <- ah[["AH109336"]]
## loading from cache
chr22 <- GRanges(seqnames=Rle(c("22")),
ranges = IRanges(1, end=50818468))
genesChr22 <- genes(ensdb, columns=c("gene_seq_start", "gene_seq_end", "gene_name"),
filter=GRangesFilter(chr22))
seqlevelsStyle(genesChr22) <- "UCSC"
# promoters
tssMargin <- 200
promotersChr22 <- promoters(ensdb, upstream=tssMargin, downstream=tssMargin,
filter=GRangesFilter(chr22), columns=c("gene_name"))
seqlevelsStyle(promotersChr22) <- "UCSC"
Retrieve metyhlation levels and visualize:
metPr <- bsseq::getMeth(bs,
regions=promotersChr22[1:100],
what="perRegion")
colnames(metPr) <- colnames(bs)
rownames(metPr) <- promotersChr22$gene_name[1:100]
metPr <- metPr[!is.na(rowSums(metPr)),]
library(viridis)
library(pheatmap)
annotationCol <- as.data.frame(pData(bs))
rownames(annotationCol) <- colnames(metPr)
pheatmap::pheatmap(metPr,
cluster_rows=TRUE,
cluster_cols=FALSE,
annotation_col=annotationCol,
show_rownames = TRUE,
color=rocket(10))
Differential methylation testing:
# design matrix
pData(bs)$Type <- relevel(as.factor(pData(bs)$Type), ref="normal")
design <- model.matrix(~Type+Pair, data=pData(bs))
# adapt for methylation data
methdesign <- modelMatrixMeth(design)
seqAnnot <- sequencing.annotate(bs, methdesign,
all.cov=TRUE,
coef="Typecancer")
## Filtering out all CpGs where at least one sample has zero coverage...
## Processing BSseq object...
## Transforming counts...
## Fitting model...
## Your contrast returned no individually significant CpGs. Consider increasing the 'fdr' parameter using changeFDR(), but be warned there is an increased risk of Type I errors.
dmrcateRes <- dmrcate(seqAnnot,
C=2,
min.cpgs=5,
pcutoff=0.05) #caution!
## Fitting chr22...
## Demarcating regions...
## Done!
dmrRanges <- extractRanges(dmrcateRes, genome="hg38")
## see ?DMRcatedata and browseVignettes('DMRcatedata') for documentation
## loading from cache
saveRDS(dmrRanges, "./w11_practical/dmr.rds")
dmrRanges <- dmrRanges[order(abs(dmrRanges$meandiff), decreasing=TRUE)]
DMR.plot(dmrRanges, dmr=1, phen.col=c(rep(mycolors[1], 3),
rep(mycolors[2], 3)),
group.means=TRUE,
CpGs=bs, genome="hg38")
## Ensembl site unresponsive, trying useast mirror
dmrRangesGenes <- dmrRanges[!is.na(dmrRanges$overlapping.genes)]
Obtain the coordinates of the genes within DMRs.
# Get the genes within Differentially methylated regions
topIdx <- order(dmrRangesGenes$min_smoothed_fdr)[1:10]
genesDmr <- unlist(tstrsplit(dmrRangesGenes[topIdx]$overlapping.genes, split=", "))
genesDmr <- genesDmr[!is.na(genesDmr)]
dmrGenes <- genesChr22[genesChr22$gene_name %in% genesDmr]
dmrGenes
## GRanges object with 15 ranges and 2 metadata columns:
## seqnames ranges strand | gene_name
## <Rle> <IRanges> <Rle> | <character>
## ENSG00000093009 chr22 19479457-19520612 + | CDC45
## ENSG00000243902 chr22 37339583-37427445 - | ELFN2
## ENSG00000166897 chr22 37367960-37427479 - | ELFN2
## ENSG00000176177 chr22 39743044-39893864 - | ENTHD1
## ENSG00000138964 chr22 44172956-44219533 + | PARVG
## ... ... ... ... . ...
## ENSG00000077942 chr22 45502238-45601135 + | FBLN1
## ENSG00000130943 chr22 46255663-46263343 - | PKDREJ
## ENSG00000100416 chr22 46330875-46357340 + | TRMU
## ENSG00000075275 chr22 46360834-46537620 - | CELSR1
## ENSG00000205632 chr22 48866770-48898386 + | LINC01310
## gene_id
## <character>
## ENSG00000093009 ENSG00000093009
## ENSG00000243902 ENSG00000243902
## ENSG00000166897 ENSG00000166897
## ENSG00000176177 ENSG00000176177
## ENSG00000138964 ENSG00000138964
## ... ...
## ENSG00000077942 ENSG00000077942
## ENSG00000130943 ENSG00000130943
## ENSG00000100416 ENSG00000100416
## ENSG00000075275 ENSG00000075275
## ENSG00000205632 ENSG00000205632
## -------
## seqinfo: 1 sequence from hg38 genome